#include "overlap_filter.hpp"

#include <cassert>
#include <algorithm>
#include <array>
#include <unordered_set>
#include <unordered_map>
#include <thread>
#include <sstream>
#include <iostream>
#include <regex>

#include "utility.hpp"

OverlapFilter::OverlapFilter() {
   
}

OverlapFilter::~OverlapFilter() {
    //DeletePtrContainer(modified_overlaps_);
}

bool OverlapFilter::ParseArgument(int argc, char *const argv[]) {
    return  GetArgumentParser().ParseArgument(argc, argv);
}

ArgumentParser OverlapFilter::GetArgumentParser() {
    ArgumentParser ap("fsa_ol_filter", "Filters out low-quality overlaps for assembling", "1.0");
    ap.AddNamedOption(min_length_, "min_length", " minimum length of reads");
    ap.AddNamedOption(max_length_, "max_length", "maximum length of reads");
    ap.AddNamedOption(min_identity_, "min_identity", " minimum identity of overlaps");
    ap.AddNamedOption(min_aligned_length_, "min_aligned_length", "minimum aligned length of overlaps");
    ap.AddNamedOption(max_overhang_, "max_overhang", " maximum overhang of overlaps, negative number = determined by the program.");
    ap.AddNamedOption(min_coverage_, "min_coverage", "minimum base coverage, negative number = determined by the program.");
    ap.AddNamedOption(max_coverage_, "max_coverage", "maximum base coverage, negative number = determined by the program");
    ap.AddNamedOption(max_diff_coverage_, "max_diff_coverage", "maximum difference of base coverage, negative number = determined by the program");
    ap.AddNamedOption(coverage_discard_, "coverage_discard", "discard ratio of base coverage. If max_coverage or max_diff_coverage is negative, it will be reset to (100-coverage_discard)th percentile.");
    ap.AddNamedOption(overlap_file_type_, "overlap_file_type", "overlap file format. \"\" = filename extension, \"m4\" = M4 format, \"paf\" = PAF format generated by minimap2, \"ovl\" = OVL format generated by FALCON.", "\"|m4|m4a|paf|ovl\"");
    ap.AddNamedOption(bestn_, "bestn", " output best n overlaps on 5' or 3' end for each read");
    //ap.AddNamedOption(genome_size_, "genome_size", "genome size, working with `--coverage` to determine the maximum length of reads", "\\d+[kKmMgG]?", ParamToGenomeSize);
    ap.AddNamedOption(genome_size_, "genome_size", "genome size. It determines the maximum length of reads with coverage together", "INT", ParamToGenomeSize);
    ap.AddNamedOption(coverage_, "coverage", "coverage. It determines the maximum length of reads with genome_size together");
    ap.AddNamedOption(output_directory_, "output_directory", "directory for output files");
    ap.AddNamedOption(thread_size_, "thread_size", "number of threads");



    ap.AddPositionOption(ifname_, "overlaps", "input filename");
    ap.AddPositionOption(ofname_, "filtered_overlaps", "ouput filename");
    return ap;
}

void OverlapFilter::Usage() {
    std::cout << GetArgumentParser().Usage();
}


void OverlapFilter::Run() {


    LOG(INFO)("Start");
    
    PrintArguments();
    
    if ( min_identity_ < 0 || 
         (min_length_ < 0 || (genome_size_ > 0 && coverage_ > 0)) ||
         min_length_ < 0 || min_aligned_length_ < 0 ||
         max_overhang_ < 0) {

        LOG(INFO)("Auto select params");
        AutoSelectParams();
    }

    LOG(INFO)("Load overlap file");
    LoadOverlaps(ifname_);
    
    //LOG(INFO)("Check simple constraints");
    //FilterSimpleMt();

    if (genome_size_ > 0 && coverage_ > 0) {
        LOG(INFO)("Keep longest %dX", coverage_);
        FilterLongest();
    }


    LOG(INFO)("Group overlaps and remove duplicated");
    GroupAndFilterDuplicateMt();

    LOG(INFO)("Check Local");
    FilterLocalMt();

    LOG(INFO)("Check Coverage");
    FilterCoverageMt();

    LOG(INFO)("Check Support");
    FilterLackOfSupportMt();

    LOG(INFO)("Remove contained reads");
    FilterContainedMt();

    LOG(INFO)("Select BestN overlaps");
    FilterBestNMt();
    

    LOG(INFO)("Save Overlaps: %s", ofname_.c_str());
    SaveOverlaps(ofname_);

    LOG(INFO)("Dump");
    Dump();

    LOG(INFO)("End");

}

void OverlapFilter::LoadOverlaps(const std::string &fname) {
    auto filter_simple = [&](Overlap& o) {
        if (o.identity_ >= min_identity_ && o.a_.id != o.b_.id &&
            o.a_.len >= min_length_ && o.b_.len >= min_length_ &&
            o.a_.len <= max_length_ && o.b_.len <= max_length_ &&
            o.AlignedLength() >= (size_t)min_aligned_length_ &&
            o.Location(max_overhang_) != Overlap::Loc::Abnormal ) {

            // ModifyEnd(o);
            return true;
        } else {
            return false;
        }
    };

    ol_store_.Load(fname, overlap_file_type_, (size_t)thread_size_, filter_simple);
    
    // MMM
    // for (auto &o : ol_store_.Get()) {
    //    reserved_overlaps_.insert(&o);
    // }

    if (ol_store_.Size() > 0) {
        LOG(INFO)("Overlap size: %zd", ol_store_.Size());
    } else {
        LOG(FATAL)("No overlap was loaded");
    }
}


void OverlapFilter::SaveOverlaps(const std::string &fname) {
    
    auto filter = [&](const Overlap& o)->bool {
        return GetOlReason(o).type == OlReason::RS_OK;
        // MMM
        // return reserved_overlaps_.find(&o) != reserved_overlaps_.end();
    };

    ol_store_.Save(fname, overlap_file_type_, filter);
}

void OverlapFilter::FilterSimple() {
    // MMM
    //for (auto o : reserved_overlaps_) {
    //    CheckSimple(*o, ignored);
    //}
    
    for (auto &o : ol_store_.Get()) {
        if (GetOlReason(o).type == OlReason::RS_OK) {
            CheckSimple(o);
        }
    }

    // MMM
    // for (auto o : ignored) {
    //     reserved_overlaps_.erase(o);
    //    filtered_overlaps_.insert(std::make_pair(o, OlReason::Simple()));
    //}
}


void OverlapFilter::CheckSimple(const Overlap &o) {
    if (o.identity_ >= min_identity_ &&  
        o.a_.len >= min_length_ && o.b_.len >= min_length_ && 
        o.a_.len <= max_length_ && o.b_.len <= max_length_ && 
        o.AlignedLength() >= (size_t)min_aligned_length_ &&
        o.Location(max_overhang_) != Overlap::Loc::Abnormal &&
        o.a_.id != o.b_.id) {

        ModifyEnd(o);
    } else {
        SetOlReason(o, OlReason::Simple());
    }
}

bool OverlapFilter::CheckEnd(const Overlap &o) const  {
    return o.Location(max_overhang_) != Overlap::Loc::Abnormal; 
}

// TODO 不修改合法的Overlap
void OverlapFilter::ModifyEnd(const Overlap &oldone) {
    if (oldone.Location(0) != Overlap::Loc::Abnormal) return;
    if (oldone.Location(max_overhang_) == Overlap::Loc::Abnormal) return;

    Overlap& o = const_cast<Overlap&>(oldone);
    if (o.a_.strand == o.b_.strand) {
        if (o.a_.start <= max_overhang_ && o.b_.start <= max_overhang_) {
            o.a_.start = 0;
            o.b_.start = 0;
        }
        else if (o.a_.start <= max_overhang_) {
            o.b_.start -= o.a_.start;
            o.a_.start = 0;
        } 
        else if (o.b_.start <= max_overhang_) {
            o.a_.start -= o.b_.start;
            o.b_.start = 0;
        }

        if (o.a_.end >= o.a_.len - max_overhang_ && o.b_.end >= o.b_.len - max_overhang_) {
            o.a_.end = o.a_.len;
            o.b_.end = o.b_.len;
        }
        else if (o.a_.end >= o.a_.len - max_overhang_) {
            o.b_.end += o.a_.len - o.a_.end;
            o.a_.end = o.a_.len;
        }
        else if (o.b_.end >= o.b_.len - max_overhang_) {
            o.a_.end += o.b_.len - o.b_.end;
            o.b_.end = o.b_.len;
        }

    }
    else {
        if (o.a_.start <= max_overhang_ && o.b_.end >= o.b_.len - max_overhang_) {
            o.a_.start = 0;
            o.b_.end = o.b_.len;
        }
        else if (o.a_.start <= max_overhang_) {
            o.b_.end += o.a_.start;
            o.a_.start = 0;
        }
        else if (o.b_.end >= o.b_.len - max_overhang_) {
            o.a_.start -= o.b_.len - o.b_.end;
            o.b_.end = o.b_.len;
        }

        if (o.b_.start <= max_overhang_ && o.a_.end >= o.a_.len - max_overhang_) {
            o.b_.start = 0;
            o.a_.end = o.a_.len;
        }
        else if (o.b_.start <= max_overhang_) {
            o.a_.end += o.b_.start;
            o.b_.start = 0;

        }
        else if (o.a_.end >= o.a_.len - max_overhang_) {
            o.b_.start -= o.a_.len - o.a_.end;
            o.a_.end = o.a_.len;
        }
    }

    assert(o.Location(0) != Overlap::Loc::Abnormal);

}

void OverlapFilter::FilterContained() {

    std::unordered_map<int, RdReason> ignored;    


    for (auto &o : ol_store_.Get()) {
        if (IsReserved(o)) {
            std::array<int,2> rel;
            if (IsContained(o, rel)) {
                ignored.insert(std::make_pair(rel[0], RdReason::Contained(rel[1])));
            }
        }
    }

    UpdateFilteredRead(ignored);
    filtered_reads_.insert(ignored.begin(), ignored.end());

}


void OverlapFilter::FilterContainedMt() {
    std::mutex mutex;       // Lock filtered_reads_

    auto split_func = [this]() {
        return SplitRange(thread_size_, (size_t)0, ol_store_.Size());
    };

    auto comb_func = [this, &mutex](const std::vector<std::array<int,2>> &rels) {
        std::lock_guard<std::mutex> lock(mutex);
        for (auto &rel : rels) 
            filtered_reads_.insert(std::make_pair(rel[0], RdReason::Contained(rel[1])));
    };

    auto work_func = [&](const std::array<size_t, 2> &range) {
        std::vector<std::array<int,2>> rels;
        
        for (size_t i=range[0]; i<range[1]; ++i) {
            const Overlap& o = ol_store_.Get(i);
            std::array<int, 2> rel;

            if (IsReserved(o) && IsContained(o, rel)) {
                rels.push_back(rel);
            }
        }

        comb_func(rels);
    };

    MultiThreadRun(thread_size_, split_func, work_func);

    UpdateFilteredRead(filtered_reads_);

}


void OverlapFilter::FilterCoverage() {

    for (auto &g : groups_) {
        auto minmax = CalcMinMaxCoverage(g.first, g.second);
        coverages_.insert(std::make_pair(g.first, std::array<int,2>{minmax.first, minmax.second}));
    }
    
       
    coverage_params_ = CoverageParam();
    FilterCoverage(coverage_params_[0], coverage_params_[1], coverage_params_[2]);
}


void OverlapFilter::FilterCoverageMt() {
    auto work_func = [&](const std::vector<int>& input) -> std::unordered_map<int, std::array<int, 2>> {
        std::unordered_map<int, std::array<int, 2>> output;
        
        for (auto i : input) {
            auto minmax = CalcMinMaxCoverage(i, groups_[i]);
            output.insert(std::make_pair(i, std::array<int,2>{minmax.first, minmax.second}));
        }
        return output;
    };

    coverages_ = MultiThreadRun(thread_size_, groups_, 
        SplitMapKeys<decltype(groups_)>, 
        work_func, 
        MoveCombineMapOrSet<std::unordered_map<int, std::array<int, 2>>>);   
    coverage_params_ = CoverageParam1();
    FilterCoverage(coverage_params_[0], coverage_params_[1], coverage_params_[2]);
}

void OverlapFilter::FilterLackOfSupport() {

    std::unordered_set<const Overlap*> ignored;
    for (const auto &o : ol_store_.Get()) {
        if (IsReserved(o) && !HasSupport(o, std::max(0, int(coverage_params_[0]-1)))) {
            ignored.insert(&o);
        }
    }
   for (auto o : ignored) {
       SetOlReason(*o, OlReason::LackOfSupport());
   }
}

void OverlapFilter::FilterLackOfSupportMt() {
    auto split_func = [this](size_t size, const std::array<size_t,2> &range) {
        return SplitRange(thread_size_, range[0], range[1]);
    };

    auto work_func = [&](const std::array<size_t, 2> &range)->std::unordered_set<const Overlap*> {
        
        std::unordered_set<const Overlap*> ignored;
        for (size_t i=range[0]; i<range[1]; ++i) {
            const Overlap& o = ol_store_.Get(i);
            if (IsReserved(o) && !HasSupport(o, std::max(0, int(coverage_params_[0]-1)))) {
                ignored.insert(&o);
            }
        }
        return ignored;
    };

   auto ignored = MultiThreadRun((size_t)thread_size_, std::array<size_t,2>{(size_t)0, ol_store_.Size()}, split_func, work_func, MoveCombineMapOrSet<std::unordered_set<const Overlap*>>);
   for (auto o : ignored) {
       SetOlReason(*o, OlReason::LackOfSupport());
   }
}

void OverlapFilter::FilterLocal() {
    for (auto &g : groups_) {
        FilterLocalStep(g.first, g.second);
    }
    
}

void OverlapFilter::FilterLocalMt() {
    auto work_func = [&](const std::vector<Seq::Id>& input)  {
        
        for (auto i : input) {
            FilterLocalStep(i, groups_[i]);
        }
    };

    MultiThreadRun(thread_size_, groups_, 
        SplitMapKeys<decltype(groups_)>, 
        work_func);   
}

void OverlapFilter::FilterLocalStep(Seq::Id id, const std::unordered_map<Seq::Id, const Overlap*>& g) {
    std::vector<std::array<double,2>> identities;
    std::vector<std::array<double,2>> overhangs;       // TODO Is it better to distiguish between 3' and 5' 

    for (auto &i : g) {
        const Overlap &o = *i.second;
        if (IsReserved(o)) {
            identities.push_back(std::array<double,2>{o.identity_, o.identity_*o.AlignedLength() / 1000.0});

            auto loc = o.Location(max_overhang_);
            assert(loc != Overlap::Loc::Abnormal);
            auto oh = o.Overhang();
            if (id == o.a_.id) {
                if (oh[0] >= 0) overhangs.push_back(std::array<double,2>{double(oh[0]), double(o.AlignedLength())});
            } else {
                assert(id == o.b_.id);
                if (oh[1] >= 0) overhangs.push_back(std::array<double,2>{double(oh[1]), double(o.AlignedLength())});
            }
        }
    }

    if (identities.size() > 0) {
        double median = 0;
        double mad = 0;
        ComputeMedianAbsoluteDeviation(identities, median, mad);
        double th = min_identity_median_ > 0 ? std::min(median - 6*1.4826*mad, min_identity_median_) : min_identity_;
        for (auto &i : g) {
            const Overlap &o = *i.second;
            if (IsReserved(o) && o.identity_ < th) {                
                SetOlReason(o, OlReason::Local(0, (int)th*100));
            }
        }
    }

    if (overhangs.size() > 0) {
        double median = 0;
        double mad = 0;
        ComputeMedianAbsoluteDeviation(overhangs, median, mad);
        double th = max_overhang_median_ > 0 ? std::max(median + 6*1.4826*mad, max_overhang_median_) : max_overhang_;
        //printf("th,median,mad: %f, %f, %f\n", th, median, mad);
        for (auto &i : g) {
            const Overlap &o = *i.second;      
            if (IsReserved(o)) {       
                auto loc = o.Location(max_overhang_);
                assert(loc != Overlap::Loc::Abnormal);
                auto ol = o.Overhang();

                if (id == o.a_.id ) {
                    if (ol[0] >= 0 && ol[0] > th)  {
                        //printf("%d %s", id, o.ToM4Line().c_str());
                        SetOlReason(o, OlReason::Local(1, th));
                    }
                } else {
                    assert(id == o.b_.id);
                    if (ol[1] >= 0 && ol[1] > th)  {
                        //printf("%d %s", id, o.ToM4Line().c_str());
                        SetOlReason(o, OlReason::Local(1, th));
                    }
                }
                            
            }
        }

    }    
    
    for (auto &i : g) {
        const Overlap &o = *i.second;
        if (IsReserved(o)) {
            ModifyEnd(o);
	    }
    }

}

void OverlapFilter::FilterLongest() {
    std::vector<std::array<int, 2>> lengths;
    std::unordered_set<int> done;

    // TODO Two step can be combined to one
    for (const auto &o : ol_store_.Get()) {
        if (IsReserved(o)) {
            if (done.find(o.a_.id) == done.end()) {
                lengths.push_back({o.a_.id, o.a_.len});
            }
            if (done.find(o.b_.id) == done.end()) {
                lengths.push_back({o.b_.id, o.b_.len});
            }
            done.insert(o.a_.id);
            done.insert(o.b_.id);
        }
    }

    size_t index = FindLongestXHeap(lengths, (long long)genome_size_*coverage_);
    for (size_t i=index; i < lengths.size(); ++i) {
        filtered_reads_.insert(std::make_pair(lengths[i][0], RdReason::NoLongest()));
    }

    UpdateFilteredRead(filtered_reads_);
}

void OverlapFilter::FilterBestN() {
    std::unordered_set<const Overlap*> keep;
    for (auto &g : groups_) {
        auto k = FindBestN(g);
        keep.insert(k.begin(), k.end());
        
    }

    for (const auto &o : ol_store_.Get()) {
        if (IsReserved(o) && keep.find(&o) == keep.end()) {
            SetOlReason(o, OlReason::BestN());
        }

    }
}


void OverlapFilter::FilterBestNMt() {
    auto work_func = [&](const std::vector<int> &input) {
        
        std::unordered_set<const Overlap*> output;
        for (auto &i : input) {
            if (filtered_reads_.find(i) == filtered_reads_.end()) {
                auto g = groups_.find(i);
                assert(g != groups_.end());
                auto k = FindBestN(*g);

                output.insert(k.begin(), k.end());
            }
            
        }
        return output;
    };


    auto keep = MultiThreadRun(thread_size_, groups_, 
        SplitMapKeys<decltype(groups_)>, 
        work_func, 
        MoveCombineMapOrSet<std::unordered_set<const Overlap*>>);

    for (const auto &o : ol_store_.Get()) {
        if (IsReserved(o) && keep.find(&o) == keep.end()) {
            SetOlReason(o, OlReason::BestN());
        }

    }

}


void OverlapFilter::AutoSelectParams() {
    std::mutex mutex;

    size_t block_size = 50000;

    std::unordered_map<int, ReadStatInfo> readInfos;
    
    struct WorkArea{
        std::unordered_map<int, ReadStatInfo> readInfos;
        void Clear() { readInfos.clear(); }
    };
    

    std::list<WorkArea> works;  // for each thread. Vector may cause memory reallocating, so list is used.

    auto alloc_work = [&]() -> WorkArea& {
        std::lock_guard<std::mutex> lock(mutex);
        works.push_back(WorkArea());

        return works.back();
    };

    auto combine = [&](WorkArea& work) {
        std::lock_guard<std::mutex> lock(mutex);
        for (const auto &i : work.readInfos) {
            auto iter = readInfos.find(i.first); 
            if (iter != readInfos.end()) {
                if (iter->second.score < i.second.score) {
                    iter->second.score = i.second.score;
                    iter->second.identity = i.second.identity;
                }
                assert(iter->second.len >= i.second.len);

                iter->second.count += i.second.count;
                iter->second.all_identity += i.second.all_identity;
                iter->second.all_score += i.second.all_score;
                iter->second.aligned += i.second.aligned;
                if (i.second.overhang >= 0) {
                    if (iter->second.overhang < i.second.overhang) iter->second.overhang = i.second.overhang;
                    iter->second.oh_count += i.second.oh_count;
                }
    
            } else {
                readInfos[i.first] = i.second;

            }
        }

        work.Clear();
    };

    auto scan_overlap = [&](Overlap& o) {
        WorkArea thread_local &work = alloc_work();
        auto loc = o.Location(1000);
        if (o.identity_ > 70 && o.AlignedLength() >= 0 && 
            loc != Overlap::Loc::Abnormal) {

            auto overhang = o.Overhang();
            int score = o.identity_*o.AlignedLength();

            auto aread = work.readInfos.find(o.a_.id);
            auto bread = work.readInfos.find(o.b_.id);

            if (aread != work.readInfos.end()) {
                if (aread->second.score < score) {
                    aread->second.score = score;
                    aread->second.identity = o.identity_;
                }
                assert(aread->second.len == o.a_.len);
                if (overhang[0] >= 0) {
                    if (overhang[0] > aread->second.overhang)  aread->second.overhang = overhang[0];   
                    aread->second.oh_count += 1;
                }
                aread->second.aligned += o.a_.end - o.a_.start;
                aread->second.all_identity += o.identity_;
                aread->second.all_score += score ;
                aread->second.count += 1;
            } else {
                ReadStatInfo info;
                info.identity = o.identity_;
                if (overhang[0] >= 0) {
                    info.overhang = overhang[0];
                    info.oh_count = 1;
                }
                info.score = score;
                info.len = o.a_.len;
                info.aligned = o.a_.end - o.a_.start;
                info.count = 1;
                info.all_identity = o.identity_ ;
                info.all_score =  score;
                work.readInfos[o.a_.id] = info;
            }

            if (bread != work.readInfos.end()) {
                if (bread->second.score < score) {
                    bread->second.score = score;
                    bread->second.identity = o.identity_;
                }
                assert(bread->second.len == o.b_.len);
                if (overhang[1] >= 0) {
                    if (overhang[1] > bread->second.overhang)  bread->second.overhang = overhang[1];   
                    bread->second.oh_count += 1;
                }
                bread->second.aligned += o.b_.end - o.b_.start;
                bread->second.all_identity += o.identity_ ;
                bread->second.all_score += score ;
                bread->second.count += 1;
            } else {
                ReadStatInfo info;
                info.identity = o.identity_;
                if (overhang[1] >= 0) {
                    info.overhang = overhang[1];
                    info.oh_count = 1;
                }
                info.score = score;
                info.len = o.b_.len;
                info.aligned = o.b_.end - o.b_.start;
                info.count = 1;
                info.all_identity = o.identity_;
                info.all_score =  score;
                work.readInfos[o.b_.id] = info;
            }

        }
        if (work.readInfos.size() >= block_size) {
            combine(work);
        }
        return false;   // not load to memory
    };

    OverlapStore ol;
    ol.Load(ifname_, "", thread_size_, scan_overlap);
    for (auto & w : works) {
        combine(w);
    }

    // for debug, print stat info
    //for (auto o : readInfos) {
    //    printf("id %d %f\n", o.first, o.second.identity);
    //}

    if (min_length_ < 0 || (genome_size_ > 0 && coverage_ > 0)) {
        AutoSelectMinLength(readInfos);
    }

    if (min_identity_ < 0) {
        AutoSelectMinIdentity(readInfos);
    }

    if (max_overhang_ < 0) {
        AutoSelectMaxOverhang(readInfos);
    }

    if (min_aligned_length_ < 0) {
        AutoSelectMinAlignedLength(readInfos);
    }
}

void OverlapFilter::AutoSelectMinLength(const std::unordered_map<Seq::Id, ReadStatInfo> &readInfos) {

    std::vector<int> lengths(readInfos.size());
    std::transform(readInfos.begin(), readInfos.end(), lengths.begin(), [](const std::pair<int,ReadStatInfo> &kv) {
        return kv.second.len;
    });

    int longx = -1;
    if (genome_size_ > 0 && coverage_ > 0) {
        size_t i = FindLongestXHeap(lengths, genome_size_*coverage_);
        if (i < lengths.size()) longx = lengths[i];
    }

    if (longx > 0 && longx > min_length_) {
        min_length_ = longx;
        LOG(INFO)("Auto Select min_length = %d to keep genome_size(%lld)*coverage(%d) bases", min_length_, genome_size_, coverage_);
    }

    if (min_length_ < 0) {
        min_length_ = 2500; 
        LOG(INFO)("Auto Select min_length = %d, a fixed value", min_length_);
    }
}

void OverlapFilter::AutoSelectMinIdentity(const std::unordered_map<Seq::Id, ReadStatInfo> &readInfos) {
    std::vector<std::array<double,2>> idents(readInfos.size());
    std::transform(readInfos.begin(), readInfos.end(), idents.begin(), [&](const std::pair<int,ReadStatInfo> &kv) {
        //return std::array<double,2>{kv.second.identity,(double)kv.second.score/1000}; // x/1000 is to prevent overflow
        return std::array<double,2>{kv.second.all_identity / kv.second.count, (double)kv.second.len/1000}; // x/1000 is to prevent overflow
    });
    //std::vector<std::array<double,2>> idents;
    //for (const auto& ri : readInfos) {
    //    if (ri.second.identity < 99.9) {
    //         idents.push_back(std::array<double,2>({ri.second.identity, ri.second.score/1000.0}));
    //    }
    //}
    double median = 0;
    double mad = 0;
    ComputeMedianAbsoluteDeviation(idents, median, mad);
    min_identity_ = median - 6*1.4826*mad;
    LOG(INFO)("Auto Select min_identity = %.02f, median=%f, mad=%f", min_identity_, median, mad);
}

void OverlapFilter::AutoSelectMaxOverhang(const std::unordered_map<Seq::Id, ReadStatInfo> &readInfos) {
    std::vector<std::array<double,2>> overhangs(readInfos.size());
    std::transform(readInfos.begin(), readInfos.end(), overhangs.begin(), [&](const std::pair<int,ReadStatInfo> &kv) {
        return std::array<double,2>{kv.second.overhang*1.0, kv.second.len/100.0};
    });

    //std::vector<std::array<double,2>> overhangs; 
    //for (const auto& ri : readInfos) {
    //    if (ri.second.overhang > 0) {
    //         overhangs.push_back(std::array<double,2>({ri.second.overhang*1.0, ri.second.len/100.0}));
    //    }
    //}
    double median = 0;
    double mad = 0;
    ComputeMedianAbsoluteDeviation(overhangs, median, mad);
    max_overhang_ = (int)(median + 6*1.4826*mad);
    LOG(INFO)("Auto Select max_overhang = %d, median=%f, mad=%f", max_overhang_, median, mad);

}

void OverlapFilter::AutoSelectMinAlignedLength(const std::unordered_map<Seq::Id, ReadStatInfo> &readInfos) {
    //std::vector<std::array<double,2>> lens(readInfos.size());
    //std::transform(readInfos.begin(), readInfos.end(), lens.begin(), [&](const std::pair<int,ReadStatInfo> &kv) {
    //    return std::array<double,2>{kv.second.aligned*1.0 / kv.second.count / kv.second.len, 1.0};
    //});

    //if (min_aligned_length_ < 0) {
    //    double median = 0;
    //    double mad = 0;
    //    ComputeMedianAbsoluteDeviation(lens, median, mad);
    //    min_aligned_length_ = (int)(median - 6*1.4826*mad) * min_length_;
    //    LOG(INFO)("Auto Select min_aligned_length = %d, median=%f, mad=%f", min_aligned_length_, median, mad);
    //}

    if (min_aligned_length_ < 0) {
        min_aligned_length_ = 2000; 
        LOG(INFO)("Auto Select min_aligned_length = %d, a fixed value", min_aligned_length_);
    }
}

void OverlapFilter::GroupAndFilterDuplicate() {
    for (const auto &o : ol_store_.Get()) {
        if (IsReserved(o)) {

            auto add_overlap = [&](int a, int b, const Overlap& o) {
                auto iter = groups_.find(a);
                if (iter != groups_.end()) {
                    auto iteriter = iter->second.find(b);
                    if (iteriter == iter->second.end()) {
                        iter->second[b] = &o;
                    } else {
                        if (BetterAlignedLength(o, *(iteriter->second))) {
                            SetOlReason(*(iteriter->second), OlReason::Duplicate());
                            iteriter->second = &o;
                        } else {
                            SetOlReason(o, OlReason::Duplicate());

                        }
                    }
                }
                else {
                    auto iter = groups_.insert(std::make_pair(a, std::unordered_map<int, const Overlap*>()));
                    iter.first->second[b] = &o;
                }
            };

            add_overlap(o.a_.id, o.b_.id, o);
            add_overlap(o.b_.id, o.a_.id, o);
        }
    }
}

void OverlapFilter::GroupAndFilterDuplicateMt() {
    std::vector<std::pair<const Overlap*, OlReason>> ignored;
    std::mutex mutex;

    auto add_overlap = [](int low, int a, int b, const Overlap& o, std::vector<std::unordered_map<Seq::Id, const Overlap*>>& group) {
        
        auto it = group[a-low].find(b);
        if (it == group[a-low].end()) {
            group[a-low][b] = &o;
        } else {
            if (BetterAlignedLength(o, *(it->second))) {
                SetOlReason(*(it->second), OlReason::Duplicate());
                it->second = &o;
            } else {
                SetOlReason(o, OlReason::Duplicate());
            }
        }

    };

    auto split_func = [this]() {
        auto r = ol_store_.GetReadIdRange();
        return SplitRange(thread_size_, r[0], r[1]);
    };
    auto comb_func = [this, &ignored, &mutex](int low, std::vector<std::unordered_map<Seq::Id, const Overlap*>>&& group) {
        std::lock_guard<std::mutex> lock(mutex);
        for (size_t i=0; i<group.size(); ++i) {
            if (group[i].size() > 0) {
                groups_[low+(int)i] = std::move(group[i]);
            }

        }
    };

    auto work_func = [this, add_overlap, comb_func](std::array<Seq::Id, 2> r) {
        std::vector<std::unordered_map<Seq::Id, const Overlap*>> group(r[1] - r[0]);
        std::vector<std::pair<const Overlap*, OlReason>> ignored;

        for (const auto &o : ol_store_.Get()) {
            if (IsReserved(o)) {
                if (o.a_.id >= r[0] && o.a_.id < r[1]) {
                    add_overlap(r[0], o.a_.id, o.b_.id, o, group);
                }
                if (o.b_.id >= r[0] && o.b_.id < r[1]) {
                    add_overlap(r[0], o.b_.id, o.a_.id, o, group);
                }
            }
        }

        comb_func(r[0], std::move(group));
    };

    MultiThreadRun((int)thread_size_, split_func, work_func);

}


std::pair<int, int> OverlapFilter::CalcMinMaxCoverage(int id, const std::unordered_map<int, const Overlap*>& group) {
    if (group.size() > 0) {
        std::vector<int> cov((group.begin()->second->a_.id == id ? group.begin()->second->a_.len :
            group.begin()->second->b_.len) + 1, 0);

        for (const auto &ig : group) {
            const Overlap& o = *ig.second;
            if (IsReserved(o)) {
                if (o.a_.id == id) {
                    cov[o.a_.start] ++;
                    cov[o.a_.end] --;
                } else {
                    assert(o.b_.id == id);
                    cov[o.b_.start] ++;
                    cov[o.b_.end] --;

                }
            }
        }
        for (size_t i = 1; i < cov.size(); ++i) {
            cov[i] += cov[i - 1];
        }
        assert(cov.back() == 0);

        auto c_minmax = std::minmax_element(cov.begin(), cov.end()-1);
        return std::make_pair(*c_minmax.first, *c_minmax.second);

    }
    else {
        return std::make_pair(0, 0);
    }


}

void OverlapFilter::PrintArguments() {
    LOG(INFO)("Arguments: \n%s", GetArgumentParser().PrintOptions().c_str());
}


void OverlapFilter::FilterCoverage(int min_coverage, int max_coverage, int max_diff_coverge) {
    LOG(INFO)("min_coverage = %d, max_coverage = %d, max_diff_coverage = %d", min_coverage, max_coverage, max_diff_coverge);

    for (auto c : coverages_) {
        if (c.second[0] < min_coverage  || c.second[1] > max_coverage ||
            c.second[1] - c.second[0] > max_diff_coverge) {
            filtered_reads_.insert(std::make_pair(c.first, RdReason::Coverage(c.second)));
        }
    }

    UpdateFilteredRead(filtered_reads_);
}

std::array<int, 3> OverlapFilter::CoverageParam() const {
    std::array<int, 3> param { min_coverage_, max_coverage_, max_diff_coverage_};

    if (param[0] < 0 && coverages_.size() > 0) {
        auto minmaxv = std::minmax_element(coverages_.begin(), coverages_.end(), 
        [](const std::pair<const int, std::array<int,2>> &a, 
           const std::pair<const int, std::array<int,2>> &b) {
            return a.second[0] < b.second[0];
        });

        auto minv = (*minmaxv.first).second[0];
        auto maxv = (*minmaxv.second).second[0];

        std::vector<int> counts(maxv-minv+1, 0);
        for (auto c : coverages_) {
            counts[c.second[0]-minv]++;
        }

        
        int accu = 0;
        for (size_t i=0; i<counts.size(); ++i) {
            accu += counts[i];
            if (coverages_.size() * coverage_discard_ / 100 < accu) {
                param[0] = i + minv;
                break;
            }
        }
        

    }

    if (param[1] < 0 && coverages_.size() > 0) {
        auto minmaxv = std::minmax_element(coverages_.begin(), coverages_.end(), 
        [](const std::pair<const int, std::array<int,2>> &a, 
           const std::pair<const int, std::array<int,2>> &b) {
            return a.second[1] < b.second[1];
        });

        auto minv = (*minmaxv.first).second[1];
        auto maxv = (*minmaxv.second).second[1];

        std::vector<int> counts(maxv-minv+1, 0);
        for (auto c : coverages_) {
            counts[c.second[1]-minv]++;
        }

        int accu = 0;
        for (size_t i=counts.size(); i>0; --i) {
            accu += counts[i-1];
            if (coverages_.size() * coverage_discard_ / 100< accu) {
                param[1] = i-1 + minv;
                break;
            }
        }
    }

    if (param[2] < 0 && coverages_.size() > 0) {
        auto minmaxv = std::minmax_element(coverages_.begin(), coverages_.end(), 
        [](const std::pair<const int, std::array<int,2>> &a, 
           const std::pair<const int, std::array<int,2>> &b) {
            return a.second[1]-a.second[0] < b.second[1]-b.second[0];
        });

        auto minv = (*minmaxv.first).second[1]-(*minmaxv.first).second[0];
        auto maxv = (*minmaxv.second).second[1]-(*minmaxv.second).second[0];

        std::vector<int> counts(maxv-minv+1, 0);
        for (auto c : coverages_) {
            counts[c.second[1]-c.second[0]-minv]++;
        }

        int accu = 0;
        for (size_t i=counts.size(); i>0; --i) {
            accu += counts[i-1];
            if (coverages_.size() * coverage_discard_ / 100 < accu) {
                param[2] = i-1 + minv;
                break;
            }
        }
    }

    return param;
}


std::array<int, 3> OverlapFilter::CoverageParam1() const {
    std::array<int, 3> param { min_coverage_, max_coverage_, max_diff_coverage_};

    std::vector<int> cov(coverages_.size());
    if (param[0] < 0 && coverages_.size() > 0) {
        std::transform(coverages_.begin(), coverages_.end(), cov.begin(), [](const decltype(coverages_)::value_type& a) {
            return a.second[0];
        });

        param[0] = FirstTrough(cov, 100, 9);
        //param[0] = Percentile(cov, coverage_percent_);
    }


    if (param[1] < 0 && coverages_.size() > 0) {
        std::transform(coverages_.begin(), coverages_.end(), cov.begin(), [](const decltype(coverages_)::value_type& a) {
            return a.second[1];
        });

        param[1] = Percentile(cov, 100-coverage_discard_);
    }

    

    if (param[2] < 0 && coverages_.size() > 0) {
        std::transform(coverages_.begin(), coverages_.end(), cov.begin(), [](const decltype(coverages_)::value_type& a) {
            return a.second[1]-a.second[0];
        });

        param[2] = Percentile(cov, 100-coverage_discard_);
    }

    return param;
}

size_t OverlapFilter::FindLongestXHeap(std::vector<std::array<int,2>> &lengths, long long goal) {

    long long accu = 0;
    size_t index = 0;
    auto cmp = [](const std::array<int, 2>&a, const std::array<int,2> &b) {
        return a[1] > b[1];
    };
    for (size_t i = 0; i< lengths.size(); ++i) {
        if (accu < goal || lengths[0][1] < lengths[i][1]) {
            accu += lengths[i][1];
            std::swap(lengths[index], lengths[i]);
            index++;
            std::push_heap(lengths.begin(), lengths.begin()+index, cmp);
        }

        while (accu - lengths[0][1] >= goal) {
            accu -= lengths[0][1];
            std::pop_heap(lengths.begin(), lengths.begin()+index, cmp);
            index--;
        
        }
    }

    return index;
}

size_t OverlapFilter::FindLongestXHeap(std::vector<int> &lengths, long long goal) {

    long long accu = 0;
    size_t index = 0;
    auto cmp = [](int a, int b) {
        return a > b;
    };
    for (size_t i = 0; i< lengths.size(); ++i) {
        if (accu < goal || lengths[0] < lengths[i]) {
            accu += lengths[i];
            std::swap(lengths[index], lengths[i]);
            index++;
            std::push_heap(lengths.begin(), lengths.begin()+index, cmp);
        }

        while (accu - lengths[0] >= goal) {
            accu -= lengths[0];
            std::pop_heap(lengths.begin(), lengths.begin()+index, cmp);
            index--;
        
        }
    }

    return index;
}

size_t OverlapFilter::FindLongestXSort(std::vector<std::array<int,2>> &lengths, long long goal) {

    long long accu = 0;
    auto cmp = [](const std::array<int, 2>&a, const std::array<int,2> &b) {
        return a[1] > b[1];
    };

    std::sort(lengths.begin(), lengths.end(), cmp);

    for (size_t i = 0; i< lengths.size(); ++i) {
        if (accu < goal) {
            accu += lengths[i][1];
        } else {
            return i;
        }
    }

    return lengths.size();
}

bool OverlapFilter::HasSupport(const Overlap &o, int count) const {

    bool r = false;
    Overlap::Loc loc = o.Location(0);  // Because modified the Ends, err could be set 0
    
    const int maxoh = 2000; // 2*max_overhang_;
    int a_head = o.a_.start;
    int a_tail = o.a_.len - o.a_.end;
    int b_head = o.b_.start;
    int b_tail = o.b_.len - o.b_.end;
    if (o.SameDirect()) {
        
        if (loc == Overlap::Loc::Left) {
            r = HasAlignment(o.a_.id, o.b_.id, 1, count, b_tail > maxoh) && HasAlignment(o.b_.id, o.a_.id, 0, count, a_head > maxoh);
        } else if (loc == Overlap::Loc::Right) {
            r = HasAlignment(o.a_.id, o.b_.id, 0, count, b_head > maxoh) && HasAlignment(o.b_.id, o.a_.id, 1, count, a_tail > maxoh);
        } else if (loc == Overlap::Loc::Containing) {
            r = HasAlignment(o.b_.id, o.a_.id, 0, count, a_head > maxoh) && HasAlignment(o.b_.id, o.a_.id, 1, count, a_tail > maxoh);
        } else if (loc == Overlap::Loc::Contained) {
            r = HasAlignment(o.a_.id, o.b_.id, 0, count, b_head > maxoh) && HasAlignment(o.a_.id, o.b_.id, 1, count, b_tail > maxoh);
        } else if (loc == Overlap::Loc::Equal) {
            r = HasAlignment(o.a_.id, o.b_.id, 0, count, false) && HasAlignment(o.a_.id, o.b_.id, 1, count, false) &&
                HasAlignment(o.b_.id, o.a_.id, 0, count, false) && HasAlignment(o.b_.id, o.a_.id, 1, count, false);
        } else {
            assert("Never Come here");
        }

    } else {
        if (loc == Overlap::Loc::Left) {
            r = HasAlignment(o.a_.id, o.b_.id, 0, count, b_tail > maxoh) && HasAlignment(o.b_.id, o.a_.id, 0, count, a_tail > maxoh);
        } else if (loc == Overlap::Loc::Right) {
            r = HasAlignment(o.a_.id, o.b_.id, 1, count, b_head > maxoh) && HasAlignment(o.b_.id, o.a_.id, 1, count, a_head > maxoh);
        } else if (loc == Overlap::Loc::Containing) {
            r = HasAlignment(o.b_.id, o.a_.id, 0, count, a_tail > maxoh) && HasAlignment(o.b_.id, o.a_.id, 1, count, a_head > maxoh);
        } else if (loc == Overlap::Loc::Contained) {
            r = HasAlignment(o.a_.id, o.b_.id, 0, count, b_tail > maxoh) && HasAlignment(o.a_.id, o.b_.id, 1, count, b_head > maxoh);
        } else if (loc == Overlap::Loc::Equal) {
            r = HasAlignment(o.a_.id, o.b_.id, 0, count, false) && HasAlignment(o.a_.id, o.b_.id, 1, count, false) &&
                HasAlignment(o.b_.id, o.a_.id, 0, count, false) && HasAlignment(o.b_.id, o.a_.id, 1, count, false);
        } else {
            assert("Never Come here");
        }
    }
    
    return r;
}

bool OverlapFilter::HasAlignment(int a, int b, int end, int count, bool exceeding) const {
    auto agroup = groups_.find(a);
    auto bgroup = groups_.find(b);

    assert(agroup != groups_.end() && bgroup != groups_.end());
    int at0 = 0;
    int at1 = 0;
    int reserved_at0 = 0;
    int reserved_at1 = 0;
    for (const auto &i : agroup->second) {
        //if (i.first == b || !IsReserved(*i.second)) continue; // skip self
        if (i.first == b ) continue; // skip self

        if (end == 0) {
            bool qualified = false;
         
            if (exceeding) {
                if (i.second->SameDirect()) {
                    qualified = i.second->a_.id == a ? i.second->b_.start > max_overhang_ 
                                                     : i.second->a_.start > max_overhang_;
                } else {
                    qualified = i.second->a_.id == a ? i.second->b_.len - i.second->b_.end > max_overhang_ 
                                                     : i.second->a_.len - i.second->a_.end > max_overhang_;
                }
            } else {
                qualified = i.second->a_.id == a ? i.second->a_.start <= max_overhang_ 
                                                 : i.second->b_.start <= max_overhang_;
            }
            auto inbgroup = bgroup->second.find(i.first);
            //if ( qualified && inbgroup != bgroup->second.end() && IsReserved(*(inbgroup->second))) {
            if ( qualified && inbgroup != bgroup->second.end()) {
                at0 ++;
                if (IsReserved(*(inbgroup->second)) && IsReserved(*i.second)) reserved_at0 ++;
                if (at0 >= count && reserved_at0 >= 1) return true;
            }
        } else {
            assert (end == 1);
            bool qualified = false;
         
            if (exceeding) {
                if (i.second->SameDirect()) {
                    qualified = i.second->a_.id == a ? i.second->b_.len - i.second->b_.end > max_overhang_ 
                                                     : i.second->a_.len - i.second->a_.end > max_overhang_;
                } else {
                    qualified = i.second->a_.id == a ? i.second->b_.start > max_overhang_ 
                                                     : i.second->a_.start > max_overhang_;
                }
            } else {
                qualified = i.second->a_.id == a ? i.second->a_.len - i.second->a_.end <= max_overhang_ 
                                                 : i.second->b_.len - i.second->b_.end <= max_overhang_;
            }
            auto inbgroup = bgroup->second.find(i.first);
            //if ( qualified && inbgroup != bgroup->second.end() && IsReserved(*(inbgroup->second))) {
            if ( qualified && inbgroup != bgroup->second.end()) {
                at1 ++;
                if (IsReserved(*(inbgroup->second)) && IsReserved(*i.second)) reserved_at1 ++; 
                if (at1 >= count && reserved_at1 >= 1) return true;
            }
        }
    }

    return false;
}

std::unordered_set<const Overlap*> OverlapFilter::FindBestN(const std::pair<int, std::unordered_map<int, const Overlap*>> &g) const {
    std::unordered_set<const Overlap*> keep;
    std::vector<const Overlap*> left, right;
    for (auto &i : g.second) {
        const Overlap& o = *(i.second);

        if (IsReserved(o)) {
            auto loc = o.Location(i.first, 0);
            if (loc == Overlap::Loc::Left) {
                left.push_back(&o);
            } else {
                assert(loc == Overlap::Loc::Right);
                right.push_back(&o);
            }
            
        }
    }

    if (left.size() > (size_t)bestn_) {
        std::sort(left.begin(), left.end(), [](const Overlap* a, const Overlap *b){ return BetterAlignedLength(*a, *b); });

        keep.insert(left.begin(), left.begin() + bestn_);
    }
    else {
        keep.insert(left.begin(), left.end());
    }
    if (right.size() > (size_t)bestn_) {
        std::sort(right.begin(), right.end(), [](const Overlap* a, const Overlap *b) { return BetterAlignedLength(*a, *b);} );

        keep.insert(right.begin(), right.begin() + bestn_);

    }
    else {
        keep.insert(right.begin(), right.end());
    }

    return keep;
}

bool OverlapFilter::IsContained(const Overlap& o, std::array<int, 2> &rel) {
    auto loc = o.Location(0);
    if (loc == Overlap::Loc::Contained || loc == Overlap::Loc::Containing || loc == Overlap::Loc::Equal) {
        //  contained = rel[0], contain = rel[1]

        if (loc == Overlap::Loc::Equal) {
            rel[0] = std::max(o.a_.id, o.b_.id);
            rel[1] = std::min(o.a_.id, o.b_.id);
        }
        else if (loc == Overlap::Loc::Contained) {
            rel[0] = o.a_.id;
            rel[1] = o.b_.id;
        }
        else {
            assert(loc == Overlap::Loc::Containing);
            rel[0] = o.b_.id;
            rel[1] = o.a_.id;
        }

        return true;

    } else {
        return false;
    }

}

void OverlapFilter::UpdateFilteredRead(const std::unordered_map<Seq::Id, RdReason> &ignored) {

    for (const auto &o : ol_store_.Get()) {
        if (IsReserved(o)) {
            auto it = ignored.find(o.a_.id);
            if (it == ignored.end()) it = ignored.find(o.b_.id);

            if (it != ignored.end()) {
                SetOlReason(o, OlReason::FilteredRead(it->first));
            }
        }
    }
}

void OverlapFilter::Dump() const {
    DumpCoverage(OutputPath(coverage_fname_));
    DumpFilteredReads(OutputPath(filtered_read_fname));
    DumpFilteredOverlaps(OutputPath(filtered_overlap_fname));
    ol_store_.GetReadStore().SaveIdToName(OutputPath("filter_id2name.txt"));
}

void OverlapFilter::DumpCoverage(const std::string &fname) const {
    std::ofstream of(fname);
    if (of.is_open()) {
        for (auto c : coverages_) {
            of << c.first << " " << c.second[0] << " " << c.second[1] << " " <<  c.second[1] -  c.second[0] << "\n";
        }

    } else {
        LOG(ERROR)("Fail to open coverage file %s", fname.c_str());
    }
}

void OverlapFilter::DumpFilteredReads(const std::string &fname) const {
    
    std::unordered_map<RdReason::Type, std::string, std::hash<int>> type_strs = {
        { RdReason::RS_CONTAINED, "Contained" },
        { RdReason::RS_COVERAGE, "Coverage" },
        { RdReason::RS_NO_LONGEST, "NoLongest" },
    };

    std::ofstream of(fname);

    if (of.is_open()) {
        for (auto r : filtered_reads_) {
            of << r.first << " " << type_strs[r.second.type] << " " << r.second.sub[0] << " " 
               << r.second.sub[1] << "\n";
        }

    } else {
        LOG(ERROR)("Fail to open filterd reads file %s", fname.c_str());
    }
}

void OverlapFilter::DumpFilteredOverlaps(const std::string &fname) const {
    std::unordered_map<OlReason::Type, std::string, std::hash<int>> type_strs = {
        { OlReason::RS_SIMPLE, "Simple" },
        { OlReason::RS_DUPLICATE, "Duplicate" },
        { OlReason::RS_BESTN, "BestN" },
        { OlReason::RS_FILTERED_READ, "FilteredRead" },
        { OlReason::RS_LACK_OF_SUPPORT, "LackOfSupport" },
        { OlReason::RS_LOCAL, "Local" },
        { OlReason::RS_UNKNOWN, "Unknown" },
    };

    std::ofstream of(fname);

    if (of.is_open()) {
        for (const auto &o : ol_store_.Get()) {
            OlReason rs = GetOlReason(o);
            if (rs.type != OlReason::RS_OK) {
                of << o.a_.id + 1<< " " << o.b_.id + 1<< " " << type_strs[rs.type] << " " << rs.sub[0] << " "  // TODO id到名字的转换
                << rs.sub[1] << "\n";

            }
        }
    } else {
        LOG(ERROR)("Fail to open filterd reads file %s", fname.c_str());
    }
}


void OverlapFilter::SetOlReason(const Overlap &o, OlReason rs) {
    o.attached = ((long long)(rs.type) << 32) + rs.sub[0];
    assert(rs.sub[1] == 0);
}

OverlapFilter::OlReason OverlapFilter::GetOlReason(const Overlap &o) {
    OlReason rs; 
    rs.type = (OlReason::Type)(o.attached >> 32);
    assert(rs.type >= OlReason::RS_OK && rs.type <= OlReason::RS_UNKNOWN);

    rs.sub[0] = o.attached & 0xFFFFFFFF;
    return rs;
}


bool OverlapFilter::ParamToGenomeSize(const std::string& str, long long *v) {
    
    //std::regex pattern("(\\d+)([gGmMkK]?)");
    //std::smatch m;
    //bool r = std::regex_match(str, m, pattern);
    //if (r) {
    //    *v = atoll(m.str(1).c_str());
    //    *v *= m.str(2) == "k" || m.str(2) == "K" ? 1024 :
    //          m.str(2) == "m" || m.str(2) == "M" ? 1024*1024 :
    //          m.str(2) == "g" || m.str(2) == "G" ? 1024*1024*1024 : 1;
    //}
    //return r;
    *v = atoll(str.c_str());
    return true;
}

int OverlapFilter::Percentile(const std::vector<int> &data, double percent) {
    assert(0 <= percent && percent <= 100);

    auto minmaxv = std::minmax_element(data.begin(), data.end());

    auto minv = (*minmaxv.first);
    auto maxv = (*minmaxv.second);

    std::vector<int> counts(maxv-minv+1, 0);
    for (auto c : data) {
        counts[c-minv]++;
    }
    
    int accu = 0;
    for (size_t i=0; i<counts.size(); ++i) {
        accu += counts[i];
        if (data.size() * percent / 100 <= accu) {
            return i + minv;
        }
    }
    return maxv;
}

int OverlapFilter::FirstTrough(const std::vector<int> &data, size_t last, size_t k) {
    assert(k % 2 == 1 && data.size() >= k);

    auto minmaxv = std::minmax_element(data.begin(), data.end());

    auto minv = (*minmaxv.first);
    auto maxv = (*minmaxv.second);

    std::vector<int> counts(maxv-minv+1, 0);
    for (auto c : data) {
        counts[c-minv]++;
    }
    
    // calc the starting poistion
    size_t s = 0;
    for (size_t i=1; i<last/10; ++i) {
        if (counts[i-1] > counts[i]) {
            s = i - 1;
            break;
        }
    }

    int value = std::accumulate(counts.begin()+s, counts.begin()+s+k, 0);
    std::pair<size_t, int> best(s, value);
    for (size_t i=s+1; i<counts.size()+k-1 && i<last; ++i) {
        value += -counts[i-1] + counts[i+k-1];
        if (value < best.second*1.00) {
            best.first = i;
            best.second = value;
        } else {
            //best.first = i;
            //best.second = value;
            break;
        }
    }

    assert(best.first >= s);
    size_t bestbest = best.first;
    for (auto i=bestbest+1; i<best.first+k; ++i) {
        if (counts[i] < counts[bestbest]) bestbest = i;
    }

    double best_mean = double(best.second)  / k;
    for (auto i=best.first; i<best.first+k; ++i) {
        // if (counts[i] < counts[bestbest]) bestbest = i;
        if (counts[i] <= best_mean && counts[i] - counts[bestbest] <= counts[bestbest]*0.1) {
            bestbest = i; 
            break;
        }
    }
    return bestbest;
}
